def configure_plotly_browser_state():
import IPython
display(IPython.core.display.HTML('''
<script src="/static/components/requirejs/require.js"></script>
<script>
requirejs.config({
paths: {
base: '/static/base',
plotly: 'https://cdn.plot.ly/plotly-1.5.1.min.js?noext',
},
});
</script>
'''))
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
from matplotlib import pyplot as plt
from pandas_profiling import ProfileReport
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
from sklearn.impute import KNNImputer
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
pd.set_option('display.float_format', lambda x: '%.3f' % x)
pd.set_option('display.max_columns', None)
data = pd.read_csv(r'C:\Users\joaow.000\Documents\ml_project1_data.csv', delimiter=',')
data
| ID | Year_Birth | Education | Marital_Status | Income | Kidhome | Teenhome | Dt_Customer | Recency | MntWines | MntFruits | MntMeatProducts | MntFishProducts | MntSweetProducts | MntGoldProds | NumDealsPurchases | NumWebPurchases | NumCatalogPurchases | NumStorePurchases | NumWebVisitsMonth | AcceptedCmp3 | AcceptedCmp4 | AcceptedCmp5 | AcceptedCmp1 | AcceptedCmp2 | Complain | Z_CostContact | Z_Revenue | Response | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5524 | 1957 | Graduation | Single | 58138.000 | 0 | 0 | 2012-09-04 | 58 | 635 | 88 | 546 | 172 | 88 | 88 | 3 | 8 | 10 | 4 | 7 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 11 | 1 |
| 1 | 2174 | 1954 | Graduation | Single | 46344.000 | 1 | 1 | 2014-03-08 | 38 | 11 | 1 | 6 | 2 | 1 | 6 | 2 | 1 | 1 | 2 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 11 | 0 |
| 2 | 4141 | 1965 | Graduation | Together | 71613.000 | 0 | 0 | 2013-08-21 | 26 | 426 | 49 | 127 | 111 | 21 | 42 | 1 | 8 | 2 | 10 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 11 | 0 |
| 3 | 6182 | 1984 | Graduation | Together | 26646.000 | 1 | 0 | 2014-02-10 | 26 | 11 | 4 | 20 | 10 | 3 | 5 | 2 | 2 | 0 | 4 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 11 | 0 |
| 4 | 5324 | 1981 | PhD | Married | 58293.000 | 1 | 0 | 2014-01-19 | 94 | 173 | 43 | 118 | 46 | 27 | 15 | 5 | 5 | 3 | 6 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 11 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2235 | 10870 | 1967 | Graduation | Married | 61223.000 | 0 | 1 | 2013-06-13 | 46 | 709 | 43 | 182 | 42 | 118 | 247 | 2 | 9 | 3 | 4 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 11 | 0 |
| 2236 | 4001 | 1946 | PhD | Together | 64014.000 | 2 | 1 | 2014-06-10 | 56 | 406 | 0 | 30 | 0 | 0 | 8 | 7 | 8 | 2 | 5 | 7 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 11 | 0 |
| 2237 | 7270 | 1981 | Graduation | Divorced | 56981.000 | 0 | 0 | 2014-01-25 | 91 | 908 | 48 | 217 | 32 | 12 | 24 | 1 | 2 | 3 | 13 | 6 | 0 | 1 | 0 | 0 | 0 | 0 | 3 | 11 | 0 |
| 2238 | 8235 | 1956 | Master | Together | 69245.000 | 0 | 1 | 2014-01-24 | 8 | 428 | 30 | 214 | 80 | 30 | 61 | 2 | 6 | 5 | 10 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 11 | 0 |
| 2239 | 9405 | 1954 | PhD | Married | 52869.000 | 1 | 1 | 2012-10-15 | 40 | 84 | 3 | 61 | 2 | 1 | 21 | 3 | 3 | 1 | 4 | 7 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 11 | 1 |
2240 rows × 29 columns
data.describe()
| ID | Year_Birth | Income | Kidhome | Teenhome | Recency | MntWines | MntFruits | MntMeatProducts | MntFishProducts | MntSweetProducts | MntGoldProds | NumDealsPurchases | NumWebPurchases | NumCatalogPurchases | NumStorePurchases | NumWebVisitsMonth | AcceptedCmp3 | AcceptedCmp4 | AcceptedCmp5 | AcceptedCmp1 | AcceptedCmp2 | Complain | Z_CostContact | Z_Revenue | Response | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 2240.000 | 2240.000 | 2216.000 | 2240.000 | 2240.000 | 2240.000 | 2240.000 | 2240.000 | 2240.000 | 2240.000 | 2240.000 | 2240.000 | 2240.000 | 2240.000 | 2240.000 | 2240.000 | 2240.000 | 2240.000 | 2240.000 | 2240.000 | 2240.000 | 2240.000 | 2240.000 | 2240.000 | 2240.000 | 2240.000 |
| mean | 5592.160 | 1968.806 | 52247.251 | 0.444 | 0.506 | 49.109 | 303.936 | 26.302 | 166.950 | 37.525 | 27.063 | 44.022 | 2.325 | 4.085 | 2.662 | 5.790 | 5.317 | 0.073 | 0.075 | 0.073 | 0.064 | 0.013 | 0.009 | 3.000 | 11.000 | 0.149 |
| std | 3246.662 | 11.984 | 25173.077 | 0.538 | 0.545 | 28.962 | 336.597 | 39.773 | 225.715 | 54.629 | 41.280 | 52.167 | 1.932 | 2.779 | 2.923 | 3.251 | 2.427 | 0.260 | 0.263 | 0.260 | 0.245 | 0.115 | 0.096 | 0.000 | 0.000 | 0.356 |
| min | 0.000 | 1893.000 | 1730.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 3.000 | 11.000 | 0.000 |
| 25% | 2828.250 | 1959.000 | 35303.000 | 0.000 | 0.000 | 24.000 | 23.750 | 1.000 | 16.000 | 3.000 | 1.000 | 9.000 | 1.000 | 2.000 | 0.000 | 3.000 | 3.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 3.000 | 11.000 | 0.000 |
| 50% | 5458.500 | 1970.000 | 51381.500 | 0.000 | 0.000 | 49.000 | 173.500 | 8.000 | 67.000 | 12.000 | 8.000 | 24.000 | 2.000 | 4.000 | 2.000 | 5.000 | 6.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 3.000 | 11.000 | 0.000 |
| 75% | 8427.750 | 1977.000 | 68522.000 | 1.000 | 1.000 | 74.000 | 504.250 | 33.000 | 232.000 | 50.000 | 33.000 | 56.000 | 3.000 | 6.000 | 4.000 | 8.000 | 7.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 3.000 | 11.000 | 0.000 |
| max | 11191.000 | 1996.000 | 666666.000 | 2.000 | 2.000 | 99.000 | 1493.000 | 199.000 | 1725.000 | 259.000 | 263.000 | 362.000 | 15.000 | 27.000 | 28.000 | 13.000 | 20.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 3.000 | 11.000 | 1.000 |
profile = ProfileReport(data, html={'style':{'full_width':True}})
profile.to_notebook_iframe()